package com.offcn.bigdata.spark.p1

import org.apache.spark.{SparkConf, SparkContext}

/**
  * Spark入门案例之WordCount
  */
object ScalaRemoteWordCountOps {
    def main(args: Array[String]): Unit = {
        if(args == null || args.length < 1) {
            println("Usage: <inputpath>")
            System.exit(-1)
        }
        //接收外部参数
        val Array(inputpath) = args

        val conf = new SparkConf()
                .setAppName(" ScalaWordCount")

        val sc = new SparkContext(conf)

        val lines = sc.textFile(inputpath)//读取hdfs
        println("lines' partition size: " + lines.getNumPartitions)
        val words = lines.flatMap(_.split("\\s+"))
        val ret = words.map((_, 1)).reduceByKey(_+_)

        ret.foreach(println)
        sc.stop()
    }
}
